Concept

This version ignores the initial clustering, instead using the bam file from the alignment of all data to the mixed human/mouse reference genomes. For each fastq read identifier, the better alignment score is stored for either genome. The script then iterates through the paired fastq file, partitioning the reads to separate fastq pairs.
These are then run though cellranger count using only the appropriate genomes.

Load human genome split of samples, labeling with hs1..hs4. Output matrix.
Convert to common mouse genes. Re-load into object. Load mouse genome split of samples, labeling with ms1..ms4. Subset overlapping genes. Merge all 8. Normalize and scale. Show clusters labeled by cell; split by sample. Show nCount_RNA also. Determine significantly-different list mouse vs human.

No Intersection

This version eliminates any barcodes intersecting both species during the data table stage, before recreating Seurat objects.

hs1.data=Read10X(data.dir="./hg19/S1/outs/filtered_feature_bc_matrix/")
hs2.data=Read10X(data.dir="./hg19/S2/outs/filtered_feature_bc_matrix/")
hs3.data=Read10X(data.dir="./hg19/S3/outs/filtered_feature_bc_matrix/")
hs4.data=Read10X(data.dir="./hg19/S4/outs/filtered_feature_bc_matrix/")
ms1.data=Read10X(data.dir="./mm10/S1/outs/filtered_feature_bc_matrix/")
ms2.data=Read10X(data.dir="./mm10/S2/outs/filtered_feature_bc_matrix/")
ms3.data=Read10X(data.dir="./mm10/S3/outs/filtered_feature_bc_matrix/")
ms4.data=Read10X(data.dir="./mm10/S4/outs/filtered_feature_bc_matrix/")

#some human gene symbols have underscores (but these are not in geneTrans).
#Substitute a dot so as not to raise an error.
rownames(hs1.data)=gsub("_",".",rownames(hs1.data))
rownames(hs2.data)=gsub("_",".",rownames(hs2.data))
rownames(hs3.data)=gsub("_",".",rownames(hs3.data))
rownames(hs4.data)=gsub("_",".",rownames(hs4.data))

#rename cells
colnames(x=hs1.data) <- paste('hs1',colnames(x=hs1.data),sep="_")
colnames(x=hs2.data) <- paste('hs2',colnames(x=hs2.data),sep="_")
colnames(x=hs3.data) <- paste('hs3',colnames(x=hs3.data),sep="_")
colnames(x=hs4.data) <- paste('hs4',colnames(x=hs4.data),sep="_")
colnames(x=ms1.data) <- paste('ms1',colnames(x=ms1.data),sep="_")
colnames(x=ms2.data) <- paste('ms2',colnames(x=ms2.data),sep="_")
colnames(x=ms3.data) <- paste('ms3',colnames(x=ms3.data),sep="_")
colnames(x=ms4.data) <- paste('ms4',colnames(x=ms4.data),sep="_")

#create objects
hs1=CreateSeuratObject(counts=hs1.data,project="MG",min.cells=5)
hs2=CreateSeuratObject(counts=hs2.data,project="MG",min.cells=5)
hs3=CreateSeuratObject(counts=hs3.data,project="MG",min.cells=5)
hs4=CreateSeuratObject(counts=hs4.data,project="MG",min.cells=5)
ms1=CreateSeuratObject(counts=ms1.data,project="MG",min.cells=5)
ms2=CreateSeuratObject(counts=ms2.data,project="MG",min.cells=5)
ms3=CreateSeuratObject(counts=ms3.data,project="MG",min.cells=5)
ms4=CreateSeuratObject(counts=ms4.data,project="MG",min.cells=5)

#Follow https://satijalab.org/seurat/essential_commands.html 
hg=merge(x=hs1,y=c(hs2,hs3,hs4),project="Hg")
mg=merge(x=ms1,y=c(ms2,ms3,ms4),project="Mg")

#at this point we don't need all the original sample objects--can re-create if needed
rm(hs1,hs2,hs3,hs4,hs1.data,hs2.data,hs3.data,hs4.data)
rm(ms1,ms2,ms3,ms4,ms1.data,ms2.data,ms3.data,ms4.data)

#summary of each object
print(hg)
## An object of class Seurat 
## 11055 features across 3354 samples within 1 assay 
## Active assay: RNA (11055 features)
print(mg)
## An object of class Seurat 
## 17255 features across 28237 samples within 1 assay 
## Active assay: RNA (17255 features)

Convert human genes to mouse

Create translated raw counts tables

Output raw data to data frame. Use conversion table (geneTrans) to translate human symbols to mouse symbols. Output mouse raw data and filter for genes in translation table.

#load gene translation table
geneTrans=read.table("geneTrans.txt",sep=",",header=T,stringsAsFactors = F,row.names = 1)

mito.m=grep("^mt",rownames(mg),value=T)
mito.h=grep("^MT-",rownames(hg),value=T)
#turns out, these two vectors (n=13 each) are ordered and match, build an extended translation table
mitoTrans=data.frame(row.names = paste("mm10",mito.m,sep="_"),
                     Human.Symbol = mito.h,
                     Homologene_ID = rep(NA,13),
                     None = rep("yes",13),
                     Mouse.Symbol = mito.m,
                     hg19 = paste("hg19",mito.h,sep="_"),
                     mm10 = paste("mm10",mito.m,sep="_")
                     )

#extended translation table
xTrans = rbind(geneTrans,mitoTrans)

#extract human raw counts into table
hg.raw=GetAssayData(hg,slot="counts")

#subset rows in geneTrans (not necessary but simpler)
hg.raw=hg.raw[row.names(hg.raw) %in% xTrans$Human.Symbol,]  #cut from 13283 to 11364 rows

#translate human symbols to mouse
hg.trans=merge(x=hg.raw,y=xTrans[,c(1,4)],by.x=0,by.y="Human.Symbol",all.x=T)
rownames(hg.trans)=hg.trans$Mouse.Symbol
hg.trans=hg.trans[,!(names(hg.trans) %in% c("Row.names","Mouse.Symbol"))]

#extract mouse raw counts
mg.raw=GetAssayData(mg,slot="counts")

#subset rows in xTrans only
mg.raw=mg.raw[row.names(mg.raw) %in% xTrans$Mouse.Symbol,]

dim(hg.trans)
## [1] 9337 3354
dim(mg.raw)
## [1] 14523 28237

Find and remove sample_barcodes common to both data tables

Note that “h” or “m” must be removed to find the overlap.

common=intersect(substring(unique(sort(colnames(hg.trans))),2),substring(unique(sort(colnames(mg.raw))),2))
length(common)
## [1] 412
hg.trans=hg.trans[,! colnames(hg.trans) %in% paste("h",common,sep="")]
mg.raw=mg.raw[, ! colnames(mg.raw) %in% paste("m",common,sep="")]

dim(hg.trans)
## [1] 9337 2942
dim(mg.raw)
## [1] 14523 27825
#convert matrix back into Seurat object
hm=CreateSeuratObject(hg.trans,project="MG",min.cells=5)
mg=CreateSeuratObject(mg.raw,project="MG",min.cells=5)

#merge with mouse
hm=merge(x=hm,y=mg,project="HM")

#clean up objects no longer needed
rm(hg,mg,hg.raw,mito.m,mito.h)

#summarize merged object
print(hm)
## An object of class Seurat 
## 14725 features across 30767 samples within 1 assay 
## Active assay: RNA (14725 features)

Scale and normalize

Use standard workflow to calculate percent.mito (now all mouse symbols) and visualize by sample.

#filter and normalize merged object
mito.features=grep(pattern="^mt",x=rownames(x=hm),value=T)

percent.mito=Matrix::colSums(x=GetAssayData(object=hm,slot="counts")[mito.features,]) / Matrix::colSums(x=GetAssayData(object=hm,slot='counts'))
hm[['percent.mito']] = percent.mito

Diagnostic plots.

VlnPlot(object=hm,features=c("nFeature_RNA","nCount_RNA","percent.mito"),ncol=3)

FeatureScatter(object=hm,feature1 = "nCount_RNA",feature2 = "percent.mito")

FeatureScatter(object=hm,feature1 = "nCount_RNA",feature2 = "nFeature_RNA")

Scale

Print summary data before and after subsetting. Then normalize, find variable genes, and scale.

print(hm)
## An object of class Seurat 
## 14725 features across 30767 samples within 1 assay 
## Active assay: RNA (14725 features)
hm=subset(hm,nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mito < 0.05) #try > 100 & < 2500 & < .5
print(hm)
## An object of class Seurat 
## 14725 features across 9011 samples within 1 assay 
## Active assay: RNA (14725 features)
hm=NormalizeData(hm,normalization.method = "LogNormalize",scale.factor=1e4)
hm=FindVariableFeatures(hm,selection.method = 'mean.var.plot',mean.cutoff = c(0.0125,3),dispersion.cutoff = c(0.5,Inf)) #finds 4047 features
length(VariableFeatures(hm))
## [1] 2363
hm=ScaleData(hm,features=rownames(hm),vars.to.regress = c("nCount_RNA","percent.mito"))  #this takes a while
## Regressing out nCount_RNA, percent.mito
## Scaling data matrix

Dimension reduction

Start with PCA and determine how many dimensions are informative.

hm=RunPCA(hm,features=VariableFeatures(hm),verbose=T)
## PC_ 1 
## Positive:  C1qb, C1qc, C1qa, Cd74, Rpl13a, Cx3cr1, Lst1, Spp1, Cybb, Apoc1 
##     Gpr34, Fcer1g, H2-Ea-ps, Sat1, C3, Alox5ap, Trem2, Fos, Fcgr4, Ptprc 
##     Csf1r, P2ry13, Maf, Cd84, A2m, Csf3r, H2-T23, Adora3, Olfml3, Ccl3 
## Negative:  Gria2, Ptn, Tsc22d1, Meg3, Meis2, Mt3, Spock2, Atp1a2, Ahi1, Pcp4l1 
##     Igfbp7, Flt1, Il1rapl1, Nrxn3, Snap25, Epha5, Cldn5, R3hdm1, Id1, Itm2a 
##     Cpe, Igf1r, Syt1, Epas1, Gng11, Mt2, Fry, Grin2b, Scg5, Sorbs2 
## PC_ 2 
## Positive:  Gria2, Meis2, Meg3, Atp1b1, Il1rapl1, Syt1, Nrxn3, Epha5, Grin2b, Celf4 
##     Ahi1, Scg5, Snap25, Stmn3, Negr1, Rtn1, Ndrg4, Snhg11, Plppr4, Arpp21 
##     Bex2, Mt3, Ank3, Peg3, Gad1, Nap1l5, Synpr, Opcml, Pcsk2, Eml5 
## Negative:  Flt1, Cldn5, Itm2a, Igfbp7, Ptprb, Id1, Abcb1a, Pglyrp1, Klf2, Egfl7 
##     Cxcl12, Adgrf5, Slc2a1, Fn1, Ramp2, Ablim1, Sgms1, Adgrl4, Sox18, Ahnak 
##     Jcad, Spock2, Epas1, Esam, Pecam1, Abcg2, Pltp, Crip1, Kitl, Slc9a3r2 
## PC_ 3 
## Positive:  Syt1, Meg3, Snap25, Ndrg4, Celf4, Gad1, Snhg11, Stmn3, Grin2b, Synpr 
##     Camk2b, Nrxn3, Plppr4, Tpm1, Tmsb10, Atp1a3, Ano3, Eml5, Bcl11a, Pcsk2 
##     Pcp4, Ccsap, C1qtnf4, Snca, Nrip3, Epha5, Gad2, Caly, Atp2b1, Syt6 
## Negative:  Atp1a2, Aldoc, Slc1a2, Mfge8, Mt2, Mt3, Ptn, Cxcl14, Ctsd, Rorb 
##     Gjb6, Slc7a10, Fxyd1, Hexb, Pla2g7, Aqp4, Car2, Slc6a11, Dbi, Msmo1 
##     Trf, Hes5, Fyb, Rnase4, Phkg1, Mlc1, Ctsz, Fjx1, Ctss, Lgmn 
## PC_ 4 
## Positive:  Atp1a2, Ptn, Aldoc, Dbi, Slc1a2, Mt2, Mfge8, Spp1, Cd74, H2-Ea-ps 
##     Cxcl14, Apoc1, Rorb, Mt3, Fxyd1, C3, Hes5, Gjb6, Slc7a10, Cybb 
##     Car2, A2m, Tpm1, Aqp4, Cryab, S100a11, Slc6a11, Msmo1, Arl5a, Fjx1 
## Negative:  Hexb, Ctss, Fyb, Ctsd, Rnase4, Tmem119, Selplg, Lgmn, Vsir, Fcgr3 
##     Cx3cr1, C1qa, C1qc, Csf1r, Cd52, Mafb, P2ry12, Ctsz, C1qb, Tgfbr1 
##     Fcer1g, Ptpn18, Unc93b1, Pou2f2, Ssh2, Arsb, Ly86, Ctsh, Rhoh, P2ry6 
## PC_ 5 
## Positive:  Cldn11, Ermn, Tspan2, Mog, Tubb4a, Ugt8a, Mag, Tmem88b, Mal, Opalin 
##     Cnp, Nkx6-2, Ppp1r14a, Stmn4, Mobp, Kctd13, Qdpr, Gjc3, Grb14, Cryab 
##     Ttll7, Hapln2, Anln, Enpp2, Kcna1, Edil3, Dixdc1, Trf, Sept4, Ndrg1 
## Negative:  Atp1a2, Mt3, Slc1a2, Aldoc, Mt2, Mfge8, Cxcl14, Cpe, Rorb, Gjb6 
##     Slc7a10, Pla2g7, Aqp4, Gria2, Fxyd1, Fjx1, Slc6a11, Hes5, Mlc1, Phkg1 
##     Etnppl, Sfxn5, Itih3, Mgst1, Pdgfrb, Dkk3, Msmo1, St6galnac5, Slc7a11, Igfbp2
print(hm[['pca']],dims=1:5,nfeatures=5,projected=F)
## PC_ 1 
## Positive:  C1qb, C1qc, C1qa, Cd74, Rpl13a 
## Negative:  Gria2, Ptn, Tsc22d1, Meg3, Meis2 
## PC_ 2 
## Positive:  Gria2, Meis2, Meg3, Atp1b1, Il1rapl1 
## Negative:  Flt1, Cldn5, Itm2a, Igfbp7, Ptprb 
## PC_ 3 
## Positive:  Syt1, Meg3, Snap25, Ndrg4, Celf4 
## Negative:  Atp1a2, Aldoc, Slc1a2, Mfge8, Mt2 
## PC_ 4 
## Positive:  Atp1a2, Ptn, Aldoc, Dbi, Slc1a2 
## Negative:  Hexb, Ctss, Fyb, Ctsd, Rnase4 
## PC_ 5 
## Positive:  Cldn11, Ermn, Tspan2, Mog, Tubb4a 
## Negative:  Atp1a2, Mt3, Slc1a2, Aldoc, Mt2
VizDimLoadings(hm,dims=1:2)

DimPlot(hm)

hm=ProjectDim(hm)
## PC_ 1 
## Positive:  Tyrobp, C1qb, B2m, Aif1, C1qc, C1qa, Cd74, Rpl13a, Cx3cr1, Lst1 
##     Spp1, Laptm5, Cybb, Ftl1, Apoc1, Gpr34, Uba52, Fcer1g, H2-Ea-ps, Rpl29 
## Negative:  Gria2, Ptn, Zbtb20, Tcf4, Selenow, Sptbn1, Mt1, mt-Atp6, Tsc22d1, Meg3 
##     Xist, Calm1, Nfib, Sparcl1, Bsg, Dclk1, Pbx1, Atp5j, Rsrp1, Elob 
## PC_ 2 
## Positive:  Gria2, Ckb, Meis2, Meg3, Dclk1, Ank2, Pcsk1n, Atp1b1, Il1rapl1, Syt1 
##     Nrxn3, Nrxn1, Epha5, Celf2, Dlgap1, Grin2b, Cadm2, Celf4, Ahi1, Syt11 
## Negative:  Flt1, Cldn5, Itm2a, Igfbp7, Ptprb, Id1, Abcb1a, Pglyrp1, Klf2, Egfl7 
##     Cxcl12, Adgrf5, Slc2a1, Fn1, Ramp2, Bsg, Ablim1, Sgms1, Adgrl4, Sox18 
## PC_ 3 
## Positive:  Syt1, Meg3, Snap25, Ndrg4, Celf4, Gad1, Snhg11, Stmn3, Grin2b, Synpr 
##     Camk2b, Nrxn3, Plppr4, Tpm1, Tmsb10, Atp1a3, Ano3, Eml5, Bcl11a, Pcsk2 
## Negative:  Cst3, Atp1a2, Apoe, Gja1, Aldoc, Plpp3, Slc1a2, Prdx6, Tsc22d4, Glul 
##     Cd81, Mt1, Slc1a3, Ntsr2, Mfge8, Clu, Mt2, Selenop, Gpr37l1, F3 
## PC_ 4 
## Positive:  Atp1a2, Plpp3, Htra1, Ptn, Gja1, Prdx6, Ptprz1, Aldoc, Gpm6b, Prnp 
##     Slc1a3, Gpr37l1, Dbi, Slc1a2, Neat1, Ntsr2, Mt2, Mfge8, Sparcl1, Clu 
## Negative:  Hexb, Ctss, Fyb, Ctsd, Rnase4, Tmem119, Selplg, Lgmn, Vsir, Fcgr3 
##     Cx3cr1, C1qa, C1qc, Csf1r, Rpl17, Cd52, Serinc3, Mafb, P2ry12, Ctsz 
## PC_ 5 
## Positive:  Cldn11, Ermn, Tspan2, Mog, Tubb4a, Ugt8a, Mag, Plp1, Tmem88b, Mal 
##     Opalin, Cnp, Nkx6-2, Ppp1r14a, Stmn4, Mobp, Kctd13, Qdpr, Gjc3, Grb14 
## Negative:  Atp1a2, Gja1, Mt3, Slc1a2, Aldoc, Mt2, Sparcl1, Ntsr2, Clu, Atp1b2 
##     Plpp3, Slc1a3, Mfge8, S1pr1, Prdx6, Id4, Cxcl14, Sox9, Dclk1, Ntm
DimHeatmap(hm,dims=1,cells=500,balanced=T)

DimHeatmap(hm,dims=1:6,cells=500,balanced=T)

hm=JackStraw(hm,num.replicate=100)
hm=ScoreJackStraw(hm,dims=1:20)
JackStrawPlot(hm,dims=1:20)
## Warning: Removed 33080 rows containing missing values (geom_point).

ElbowPlot(hm)

Start clustering

#start clustering
hm=FindNeighbors(hm,dims=1:13) #adjust dims based on plots
## Computing nearest neighbor graph
## Computing SNN
hm=FindClusters(hm,resolution=0.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9011
## Number of edges: 310729
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9680
## Number of communities: 13
## Elapsed time: 1 seconds
table(Idents(hm))
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12 
## 2169 1552 1533 1022  540  471  468  420  249  214  199  132   42

Find tSNE projection and add to object

hm=RunTSNE(hm,dims=1:13)
DimPlot(hm,reduction='tsne')

DimPlot(hm,reduction='tsne',split.by='orig.ident')

FeaturePlot(hm,features='Spp1')

FeaturePlot(hm,features='Hexb')

FeaturePlot(hm,features='nCount_RNA')

UMAP

hm=RunUMAP(hm,dims = 1:13)
DimPlot(hm,reduction='umap')

FeaturePlot(hm,features='Spp1')

FeaturePlot(hm,features='Hexb')

#FeaturePlot(hm,features='nCount_RNA',split.by='orig.ident')
FeaturePlot(hm,features='nCount_RNA')

Label clusters

Use top 2 genes from prior clustering to do this, following

hm.markers=FindAllMarkers(hm,only.pos=T,min.pct = .25,logfc.threshold = .25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
## Calculating cluster 12
hm.markers %>% group_by(cluster) %>% top_n(10,avg_logFC)
## # A tibble: 130 x 7
## # Groups:   cluster [13]
##    p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene  
##    <dbl>     <dbl> <dbl> <dbl>     <dbl> <fct>   <chr> 
##  1     0      2.77 0.983 0.211         0 0       Slc1a2
##  2     0      2.55 0.944 0.133         0 0       Aldoc 
##  3     0      2.45 0.972 0.243         0 0       Mt2   
##  4     0      2.42 0.984 0.293         0 0       Mt3   
##  5     0      2.40 0.935 0.124         0 0       Plpp3 
##  6     0      2.38 0.932 0.094         0 0       Gja1  
##  7     0      2.36 0.886 0.105         0 0       Clu   
##  8     0      2.30 0.847 0.069         0 0       Ntsr2 
##  9     0      2.30 0.993 0.518         0 0       Mt1   
## 10     0      2.21 0.972 0.276         0 0       Slc1a3
## # … with 120 more rows

Save top 5 list

hm.markers %>% group_by(cluster) %>% top_n(5,avg_logFC) %>% write.csv("BS2 Top 5 per cluster.csv",row.names = F)
hm.markers %>% group_by(cluster) %>% top_n(5,avg_logFC) %>% as.data.frame %>% paged_table

Identify human and mouse microglial clusters

Use Spp1 and Hexb to identify cluster number for human and mouse microglia, respectively.

humClus=as.character(subset(hm.markers,gene=="Spp1")$cluster)
print(humClus)
## [1] "2"  "11"
musClus=as.character(subset(hm.markers,gene=="Hexb")$cluster)
print(musClus)
## [1] "1"  "12"

Find genes differentially expressed between human and mouse microglia

diffGenes=FindMarkers(hm,ident.1=humClus,ident.2=musClus,min.pct=0.1)

Display summary of differentially expressed genes

table(sig=diffGenes$p_val_adj <= 0.05, twofold=abs(diffGenes$avg_logFC) >= 1)
##        twofold
## sig     FALSE TRUE
##   FALSE    65    0
##   TRUE   1241  175
diffGenes %>% tibble::rownames_to_column() %>% filter(p_val_adj <= 0.05) %>% filter(abs(avg_logFC) >= 1) %>% paged_table

Save differential expressed list as object (for later use in R) and csv (as text to preserve gene names from Excel date conversion)

saveRDS(diffGenes,"BS2_diffGenes.rds")
write.table(as.data.frame(diffGenes),"BS2_diffGenes_csv.txt",sep=",",quote=T)

Cluster Averages

hm.mean=AverageExpression(hm)
## Finished averaging RNA for cluster 0
## Finished averaging RNA for cluster 1
## Finished averaging RNA for cluster 2
## Finished averaging RNA for cluster 3
## Finished averaging RNA for cluster 4
## Finished averaging RNA for cluster 5
## Finished averaging RNA for cluster 6
## Finished averaging RNA for cluster 7
## Finished averaging RNA for cluster 8
## Finished averaging RNA for cluster 9
## Finished averaging RNA for cluster 10
## Finished averaging RNA for cluster 11
## Finished averaging RNA for cluster 12
head(hm.mean$RNA)
##                0           1          2          3           4          5
## A1bg  0.00000000 0.000000000 0.63342559 0.00000000 0.000000000 0.00000000
## A2m   0.06894108 0.005264187 5.49485892 0.01429382 0.024353814 0.01156396
## Aaas  0.02697960 0.052568618 0.10880348 0.08198218 0.004771584 0.04825179
## Aacs  0.20449473 0.048839604 0.04470524 0.05406426 0.076340008 0.05287955
## Aagab 0.09951070 0.285966946 0.06236174 0.12813365 0.132586729 0.07607692
## Aak1  0.56296710 0.304263223 2.09726290 0.46899087 1.311051225 0.78214432
##                6          7          8          9         10         11
## A1bg  0.00000000 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## A2m   0.03299079 0.01779597 0.00000000 0.00000000 0.03437158 0.44817055
## Aaas  0.09609653 0.03793106 0.05837784 0.16014683 0.04049255 0.09980865
## Aacs  0.03992345 0.00000000 0.00000000 0.01681503 0.13537292 0.01713972
## Aagab 0.11837932 0.07577071 0.02400786 0.08681587 0.05165831 0.18034523
## Aak1  0.30504964 1.49403949 0.49686161 0.56087248 0.75075909 0.38341312
##               12
## A1bg  0.00000000
## A2m   0.07135009
## Aaas  0.09396024
## Aacs  0.00000000
## Aagab 0.00000000
## Aak1  0.60570810
write.csv(hm.mean$RNA,"BS2_hm_cluster_averages_csv.txt")

Show species by original split

#stash idents 
hm[["old.ident"]]=Idents(hm)
#get vector of cell idents
all.cells=Cells(hm)
#split by species
hg.cells=grep("^h",all.cells,value=T)
mm.cells=grep("^m",all.cells,value=T)
#apply new idents
Idents(hm,cells=hg.cells)="Human"
Idents(hm,cells=mm.cells)="Mouse"
table(hm$orig.ident,Idents(hm))
##      
##       Mouse Human
##   hs1     0   392
##   hs2     0   259
##   hs3     0   534
##   hs4     0   326
##   ms1  1652     0
##   ms2   717     0
##   ms3  3230     0
##   ms4  1901     0

Plot tsne labeled by species no legend

DimPlot(hm,label=T,repel=T,label.size=8,reduction = "tsne")+NoLegend()

Plot nCounts in tsne labeled by species

FeaturePlot(hm,features='nCount_RNA',reduction = "tsne")

FeaturePlot(hm,features='nCount_RNA',reduction = "tsne",split.by="ident")

Check overlapping cells

Distribution of detected transcripts

To test the level of detection in each cell, we’ll display the distribution of counted transcripts per cells.

#replace counts > 0 with T/F
hg.nz=as.data.frame(hg.trans) > 0
mg.nz=as.data.frame(mg.raw) > 0

#sum the TRUE values for each column (cell)
mg.nz=apply(mg.nz,MARGIN=2,FUN=sum)
hg.nz=apply(hg.nz,MARGIN=2,FUN=sum)

#check distributions
plot(density(log10(hg.nz)))
lines(density(log10(mg.nz)),lty="dashed")

Venn

Finally, we’ll check whether any cell identifiers (sample_barcode) are found in common across the two species.

#Venn
#extract only sample_barcodes with more than 500 genes > 0
hg.x=names(which(hg.nz > 500))
mg.x=names(which(mg.nz > 500))


#strip off species code from sample index and assemble into list
#diffList=list(hg19=sapply(strsplit(hg.x,"_"),`[`,2),mm10=sapply(strsplit(mg.x,"_"),`[`,2))
diffList=list(hg19=unique(sub("h","",hg.x)),mm10=unique(sub("m","",mg.x)))
#generate object
venn.plot=venn.diagram(diffList,filename=NULL,euler.d=T,scaled=T,col='transparent',
                       alpha=.5,fill=c("cornflowerblue","coral2"),
                       fontfamily='Helvetica',cat.fontfamily='Helvetica',cat.cex=3,cex=2)
png("BS2 Venn Overlap.png",width=1200,height=900)
grid.draw(venn.plot) #save to file
dev.off()
## png 
##   2
grid.newpage()
#plot it in output
grid.draw(venn.plot)

Session Info

sessionInfo()
## R version 3.5.3 (2019-03-11)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] VennDiagram_1.6.20  futile.logger_1.4.3 rmarkdown_1.12     
## [4] dplyr_0.8.0.1       Seurat_3.0.0.9000  
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-137         tsne_0.1-3           bitops_1.0-6        
##  [4] RColorBrewer_1.1-2   httr_1.4.0           tools_3.5.3         
##  [7] utf8_1.1.4           R6_2.4.0             irlba_2.3.3         
## [10] KernSmooth_2.23-15   lazyeval_0.2.2       colorspace_1.4-1    
## [13] withr_2.1.2          npsurv_0.4-0         tidyselect_0.2.5    
## [16] compiler_3.5.3       cli_1.1.0            formatR_1.6         
## [19] plotly_4.9.0         labeling_0.3         caTools_1.17.1.2    
## [22] scales_1.0.0         lmtest_0.9-36        ggridges_0.5.1      
## [25] pbapply_1.4-0        stringr_1.4.0        digest_0.6.18       
## [28] R.utils_2.8.0        pkgconfig_2.0.2      htmltools_0.3.6     
## [31] bibtex_0.4.2         htmlwidgets_1.3      rlang_0.3.4         
## [34] zoo_1.8-5            jsonlite_1.6         ica_1.0-2           
## [37] gtools_3.8.1         R.oo_1.22.0          magrittr_1.5        
## [40] Matrix_1.2-17        fansi_0.4.0          Rcpp_1.0.1          
## [43] munsell_0.5.0        ape_5.3              reticulate_1.12     
## [46] R.methodsS3_1.7.1    stringi_1.4.3        yaml_2.2.0          
## [49] gbRd_0.4-11          MASS_7.3-51.1        gplots_3.0.1.1      
## [52] Rtsne_0.15           plyr_1.8.4           parallel_3.5.3      
## [55] gdata_2.18.0         listenv_0.7.0        ggrepel_0.8.0       
## [58] crayon_1.3.4         lattice_0.20-38      cowplot_0.9.4       
## [61] splines_3.5.3        SDMTools_1.1-221     knitr_1.22          
## [64] pillar_1.3.1         igraph_1.2.4         future.apply_1.2.0  
## [67] codetools_0.2-16     futile.options_1.0.1 glue_1.3.1          
## [70] evaluate_0.13        lsei_1.2-0           metap_1.1           
## [73] lambda.r_1.2.3       data.table_1.12.2    png_0.1-7           
## [76] Rdpack_0.11-0        gtable_0.3.0         RANN_2.6.1          
## [79] purrr_0.3.2          tidyr_0.8.3          future_1.12.0       
## [82] assertthat_0.2.1     ggplot2_3.1.1        xfun_0.6            
## [85] rsvd_1.0.0           survival_2.43-3      viridisLite_0.3.0   
## [88] tibble_2.1.1         cluster_2.0.8        globals_0.12.4      
## [91] fitdistrplus_1.0-14  ROCR_1.0-7